# .........................................................................
# Title: consolidate_firm_sample.py
#
# Merges firm-level outcome and covariates data to generate consolidated
# panels for firm-level regressions
# .........................................................................

import pandas as pd
import numpy as np
import econtools.metrics as mt
from econtools.util.frametools import group_id

# directories
PROJECT_PATH = "<PROJECT_PATH>/"
TMP = PROJECT_PATH + "temp/"

# ---------------------------------------
# Investment
# ---------------------------------------

# read in data
compustat = pd.read_stata(TMP + "compustat_up_essentials_y.dta").rename(columns={'fyear': 'year'}).drop(columns=['_firm_id'])
ownership = pd.read_stata(TMP + "up_ownership.dta")
ratings = pd.read_stata(TMP + "issuer_rating_cat_up.dta")
industry = pd.read_stata(TMP + "compustat_up_industry.dta")
compustat['capx_aqc_at'] = compustat.capx_at + compustat.aqc_at
compustat = compustat[compustat.year >= 2004]

# covariate data
covariates = pd.read_stata(TMP + "up_investment_summary_y.dta").drop(
    columns=['issuer_rating_cat', 'issuer_rating_cat_pm', 'share_ins_issuance', '_issuer_rating_cat', '_issuer_rating_cat_pm', 'naics2', 'capx_at', 'capx_aqc_at']
)

# firm sizes
sizes = compustat[compustat.year == 2007][['firm_id', 'at']].rename(columns={'at': 'at_baseline'})
sizes['log_at_baseline'] = np.log(sizes.at_baseline)
sizes['ones'] = 1;

# other values at baseline
baseline_vals = covariates[covariates.year == 2007][['firm_id', 'ebit_at', 'dt_ebit', 'dt_at', 'dltt_at']].rename(
    columns={'ebit_at': 'ebit_at_baseline', 'dt_ebit': 'dt_ebit_baseline', 'dt_at': 'dt_at_baseline', 'dltt_at': 'dltt_at_baseline'}
)

# investment sample
investment_sample = compustat.merge(ownership, on='firm_id', how='left').dropna(subset=['share_ins_issuance'])
investment_sample = investment_sample.merge(ratings, on='firm_id', how='left').merge(industry, on='firm_id', how='left')
investment_sample = investment_sample.merge(covariates, on=['firm_id', 'year'], how='left')
investment_sample = investment_sample[investment_sample.issuer_rating_cat.isin(['AAA', 'AA', 'A', 'BBB'])]
investment_sample = investment_sample[investment_sample.year.between(2004, 2011)]
investment_sample = investment_sample.merge(sizes, on='firm_id', how='left')
investment_sample = investment_sample.merge(baseline_vals, on='firm_id', how='left')
investment_sample['size_decile'] = pd.qcut(investment_sample.log_at_baseline, q=10, labels=False)
investment_sample['profitability_decile'] = pd.qcut(investment_sample.ebit_at_baseline, q=10, labels=False)
investment_sample['leverage_decile'] = pd.qcut(investment_sample.dltt_at_baseline, q=10, labels=False)
investment_sample['size_z'] = investment_sample.log_at_baseline / investment_sample.log_at_baseline.std()
investment_sample['profitability_z'] = investment_sample.ebit_at_baseline / investment_sample.ebit_at_baseline.std()
investment_sample['leverage_z'] = investment_sample.dltt_at_baseline / investment_sample.dltt_at_baseline.std()
investment_sample.to_stata(TMP + "consolidated_inv_sample.dta")

# ---------------------------------------
# Issuance
# ---------------------------------------

# issuance sample
issuance = pd.read_stata(TMP + "issuance_indicators_up.dta").rename(columns={'issuance_year': 'year'})
issuance_sample = issuance.merge(ownership, on='firm_id', how='left').dropna(subset=['share_ins_issuance'])
issuance_sample = issuance_sample.merge(ratings, on='firm_id', how='left')
issuance_sample = issuance_sample[issuance_sample.issuer_rating_cat.isin(['AAA', 'AA', 'A', 'BBB'])]
issuance_sample = issuance_sample[issuance_sample.year.between(2004, 2011)]
issuance_sample = issuance_sample.merge(sizes, on='firm_id', how='left')
issuance_sample = issuance_sample.merge(investment_sample[['firm_id', 'year', 'naics2', 'size_decile', 'leverage_decile']], on=['firm_id', 'year'], how='left')

# dummies
years = range(2004, 2012)
for year in years:
    issuance_sample['share_{}'.format(year)] = issuance_sample.share_ins_issuance * (issuance_sample.year == year).astype(int)
    issuance_sample['dummy_{}'.format(year)] = (issuance_sample.year == year).astype(int)
issuance_sample.to_stata(TMP + "/consolidated_issuance_sample.dta")

# ---------------------------------------
# Offering yields
# ---------------------------------------

# yields sample
yields = pd.read_stata(TMP + "firm_offer_yields.dta").rename(columns={'issuance_year': 'year'})
yields_sample = yields.merge(ownership, on='firm_id', how='left').dropna(subset=['share_ins_issuance'])
yields_sample = yields_sample.merge(ratings, on='firm_id', how='left')
yields_sample = yields_sample[yields_sample.issuer_rating_cat.isin(['AAA', 'AA', 'A', 'BBB'])]
yields_sample = yields_sample[yields_sample.year.between(2004, 2011)]
yields_sample = yields_sample.merge(sizes, on='firm_id', how='left')
yields_sample = yields_sample.merge(investment_sample[['firm_id', 'year', 'naics2', 'size_decile', 'leverage_decile']], on=['firm_id', 'year'], how='left')

# dummies
years = range(2004, 2012)
for year in years:
    yields_sample['share_{}'.format(year)] = yields_sample.share_ins_issuance * (yields_sample.year == year).astype(int)
    yields_sample['dummy_{}'.format(year)] = (yields_sample.year == year).astype(int)
    
yields_sample.to_stata(TMP + "/consolidated_off_yields_sample.dta")
